#************************************************************************
# File-Name: 	Replication_GridMiniGrid.R
#	  Date:  		02/03/2020
#	 Author: 	  Anjali Sharma, Shalu Agrawal, Johannes Urpelainen
#	Purpose:    .R file to replicate:
#					    Sharma, Anjali; Agrawal, Shalu; Urpelainen, Johannes
#					    "The Adoption and Use of Solar Mini-Grids in Grid-Electrified
#             Indian Villages"

#				      All survey data come from:
#			        Shalu Agrawal; Nidhi Bali; Johannes Urpelainen; Aseem Mahajan; Daniel Robert Thomas; 
#             Sidhartha Vermani; Ryan Kennedy; Smart Power India; Initiative for Sustainable Energy 
#             Policy, 2019, "Rural Electricity Demand in India (REDI)"	
					
#	************************************************************************
##loading data for Enterprise Data Analysis
# ************************************************************************
#Packages
library(reshape)
library(ggplot2)
library(stringi)

#Please modify data path if needed
setwd("~/Dropbox/Rockefeller Analysis/Manuscript_Anjali/R Studio")
data <- read.csv("Ent_data_full.csv")

#summarizing site codes
site_codes <- table(data$site_code)
#view site_codes
site_codes

#keeping only site_code 1 villages as analysis limited only to villages with MG intervention
onlyMGIV <- data[data$site_code == 1,]

#creating new categorical variable based on grid and mg use
onlyMGIV$grid_and_mg <- 99
onlyMGIV$grid_and_mg[onlyMGIV$q301_grid_use==1 & onlyMGIV$q335_mgrid_use==1] <- 1 #use both
onlyMGIV$grid_and_mg[onlyMGIV$q301_grid_use==1 & onlyMGIV$q335_mgrid_use==0] <- 2 #only grid
onlyMGIV$grid_and_mg[onlyMGIV$q301_grid_use==0 & onlyMGIV$q335_mgrid_use==1] <- 3 #only minigrid
onlyMGIV$grid_and_mg[onlyMGIV$q301_grid_use==0 & onlyMGIV$q335_mgrid_use==0] <- 4 #use neither

#tabulate grid and minigrid use
table(onlyMGIV$grid_and_mg)

#removing the 3 observations that do not get coded in grid_and_mg variable
finaldata <- onlyMGIV[!(onlyMGIV$grid_and_mg == 99),]
#this leaves us with a dataset with 541 observations and 478 variables

################################
## Descriptives about socio-economic characteristics of surveyed enterprises
###############################

##type of enterprises
table(finaldata$q208_shop_type)/length(finaldata$q208_shop_type)

## no. of employees
summary(finaldata$q209_employees)
table(finaldata$q209_employees)/length(finaldata$q209_employees)

##salaried employees
summary(finaldata$q210_salaried_employees)
table(finaldata$q210_salaried_employees)/length(finaldata$q210_salaried_employees)

##shop rented or not
summary(finaldata$q213_shop_owned)
table(finaldata$q213_shop_owned)/length(finaldata$q213_shop_owned)

#type of structure
summary(finaldata$q214_building_type)
table(finaldata$q214_building_type)/length(finaldata$q214_building_type)

##estimate of goods and furniture value
summary(finaldata$q221_shop_inventory)
table(finaldata$q221_shop_inventory)/length(finaldata$q221_shop_inventory)


#######################################
# (Fig. 1) comparing the share of mini grid users with hours of grid supply for enterprises#
########################################

#please change file path if needed
af <- read.csv("Ent_data_full.csv")

#Calculating Daily hours of supply- mean at village level:
vil_grid_hr <- group_by(af,q011_village_code) %>% summarize(mean(q308_grid_hours, na.rm =T))
vil_grid_hr$vil_grid_hr <- 
  round(vil_grid_hr$`mean(q308_grid_hours, na.rm = T)`, 1) 
vil_grid_hr$`mean(q308_grid_hours, na.rm = T) <- NULL

af <- merge(af, vil_grid_hr, by="q011_village_code", all.x=TRUE)
summary(af$vil_grid_hr) # 2 villages with no grid users - zero supply 
af$vil_grid_hr[is.na(af$vil_grid_hr)] <- 0
#in villages with no electricity - zero hours of supply
summary(af$vil_grid_hr)

adf <- af %>% filter(af$site_code==1)
summary(adf$q335_mgrid_use)

sf <-  adf %>% 
  group_by(q011_village_code) %>%
  summarise(
    ecount = n(),
    emg = length(which(q335_mgrid_use==1)),
    emg_p = round(emg*100/ecount,1),
    eg = length(which(q301_grid_use==1)),
    eg_p = round(eg*100/ecount,1),
    ehours = unique(vil_grid_hr)
  ) 
table(sf$emg_p) # 34% have no HH mini-grid users

a <- merge(rf, sf, by ="q011_village_code")
#in villages where no enterprise has grid electricty, assiging hours of supply from hh dataset:
a$ehours_new <- ifelse(a$ehours==0, a$hours, a$ehours)

##comparing hours reported by enterprises and households:
p1 <- ggplot(a, aes(a$hours, a$ehours_new))+geom_point() +
  scale_x_continuous("Average grid-electricity supply - households (hours/day)") +
  scale_y_continuous("Average grid-electricity supply - enterprises (hours/day)") +
  theme_bw()
setwd("C:/Users/Shalu/Dropbox/Rockefeller Analysis/Manuscript_1_demand/Figures")
pdf(file= "gridsupply_hh_vs_ent.pdf",  height = 4, width = 6)
p1
dev.off()

## significant difference
summary(a$ehours-a$hours)
# - enterprises typically under-reprting hours of supply, so we rely on hours reported by households to plot entrprises share of mini-grid users across villages with varying supply hours:


p <- ggplot(a, aes(a$hours, a$emg_p)) +
  geom_point( color = "pink",size=4.5) + 
  ggtitle("Enterprises")+
  scale_x_continuous("Average grid-electricity supply at village level (hours/day) ",
                     breaks = seq(4,24,4)) +
  scale_y_continuous("Share of enterprises using mini-grid (%)", limits = c(0,100)) +
  stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 1)+
  theme_bw()
p + theme(axis.title.x = element_text(size = 21), axis.title.y = element_text(size = 23),
          axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
          plot.title = element_text(size=23, hjust = 0.5))
p + geom_smooth(method="loess",se=TRUE)

setwd("C:/Users/Shalu/Dropbox/Rockefeller Analysis/Manuscript_1_demand/Figures")
pdf(file= "E_mg_users_vs_gridsupply.pdf",  height = 4, width = 6)
p
dev.off()

p <- ggplot(a, aes(a$hours, a$eg_p)) +
  geom_point( color = "blue") + 
  geom_smooth(method = "lm") +
  scale_x_continuous("Average grid-electricity supply at village level (hours/day) ",
                     breaks = seq(4,24,4)) +
  scale_y_continuous("Share of enterprises using grid electricity (%)") +
  theme_bw()

#####################################
## Primary Source of Electricity ####
####################################

primelec <- table(finaldata$q352_elec_primary_source)/length(finaldata$q352_elec_primary_source)
primelec
#So, 81 enterprises did not report any primary source
rownames(primelec) <- c("Grid", "Mini-Grid", "SHS", "Battery", "Diesel Gen")
barplot(as.matrix(primelec),beside=T)

###################################
## (Fig. 2) all sources of electricity used by enterprises
##################################

## create a new variable for GRID USERS
finaldata$gridusers <- 0
finaldata$gridusers[finaldata$q301_grid_use==1] <- 1 #use grid


## create a new variable for SHS USERS
finaldata$shsusers <- 0
finaldata$shsusers[finaldata$q317_shs_use==1] <- 1 #use shs


## create a new variable for BATTERY USERS
finaldata$batteryusers <- 0
finaldata$batteryusers[finaldata$q324_battery_use==1] <- 1 #use battery

## create a new variable for MINI-GRID USERS
finaldata$minigridusers <- 0
finaldata$minigridusers[finaldata$q335_mgrid_use==1] <- 1 #use mini-grid

## create a new variable for DIESEL GENSET USERS
finaldata$dieselusers <- 0
finaldata$dieselusers[finaldata$q345_dg_use] <- 1 #use diesel


#creating a new dataset for sources of electricity
elecsources <- finaldata[,c(478:483)]

#getting mean values
Grid <- mean(elecsources$gridusers)*100
SHS <- mean(elecsources$shsusers)*100
Battery <- mean(elecsources$batteryusers)*100
MiniGrid <- mean(elecsources$minigridusers)*100
Diesel <- mean(elecsources$dieselusers)*100

elecsources1 <- cbind(Grid, SHS, Battery, MiniGrid,Diesel)

#######################################################
##making a barplot for different sources of electricity
######################################################

elecsources2 <- barplot(as.matrix(elecsources1),
                       beside=T,
                       ylim = c(0,100),
                       main="Enterprises",
                       ylab = "Percent",
                       col="lightpink1",
                       font.main=1,
                       cex.axis = 1.8,
                       cex.main = 2.2,
                       cex.names = 1.8,
                       cex.lab = 1.8,
                       space = c(0,0.37))

y2 <- round(as.matrix(elecsources1),1)
text(elecsources2,y2+4,labels=as.character(y2), cex=1.6)


##########################
## (Fig. 3) MOSAIC PLOT ###########
########################

# run lines 25-47

install.packages("vcd")
library(vcd)

## create a new variable for GRID USERS
finaldata$gridusers <- 0
finaldata$gridusers[finaldata$q301_grid_use==1] <- 1 #use grid
#recoding variable
finaldata$gridusers[finaldata$gridusers==1] <- "Grid"
finaldata$gridusers[finaldata$gridusers==0] <- "No Grid"

## create a new variable for MINI-GRID USERS
finaldata$minigridusers <- 0
finaldata$minigridusers[finaldata$q335_mgrid_use==1] <- 1 #use mini-grid
#recoding variable
finaldata$minigridusers[finaldata$minigridusers==1] <- "Mini-Grid"
finaldata$minigridusers[finaldata$minigridusers==0] <- "No Mini-Grid"

structable1 <- structable(~ gridusers + minigridusers, data=finaldata)
labs <- 100*round(prop.table(structable1),2)
mosaic1 <- mosaic(structable1,
                  pop=FALSE,
                  xlab="",
                  ylab="",
                  main="Enterprises",
                  shade=TRUE,
                  gp=gpar(fill=c("lightpink1")),
                  legend=FALSE,
                  gp_labels=(gpar(fontsize=18)),
                  labelling=labeling_cells(text = labs, margin=0, gp_text=gpar(fontsize=18))(as.table(structable1)))

### some more analysis of those who do not use electricity ##
finaldata$noelec <- 0
finaldata$noelec[finaldata$gridusers=="No Grid" & finaldata$minigridusers=="No Mini-Grid"] <-1

## income based classification
table(finaldata$noelec, finaldata$q220_mon_shop_income)
summary(finaldata$q220_mon_shop_income)

##reasons for not using electricity
table(finaldata$q301_b_no_grid_reason1,finaldata$q208_shop_type)
table(finaldata$q301_b_no_grid_reason2,finaldata$q208_shop_type)
table(finaldata$q301_b_no_grid_reason3,finaldata$q208_shop_type)
table(finaldata$q301_b_no_grid_reason4,finaldata$q208_shop_type)
table(finaldata$q301_b_no_grid_reason5,finaldata$q208_shop_type)


##########################
## (Fig. 4) What kind of APPLIANCES are being used  for EACH ELECTIRCITY SOURCE ###
########################

# run lines 25-47

#I will create 3 different dummy variables - one for Lighting, one for Cooling, one for Others
#each dummy variable indicates whether the enterprise uses that particular kind of appliance or not

#Lighting dummy variable
finaldata$Lighting <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
lighting <- c("a", "b", "c", "d") #includes cfl, led, tube, and incandescent bulb
sel <- apply(finaldata[,collist],1,function(row) any(lighting %in% row))
finaldata$Lighting[sel] <- 1

summary(finaldata$Lighting)
#mean is ~0.80 which means 80% of all enterprises use at least one of the lighting appliances

#Cooling dummy variable
finaldata$Cooling <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
cooling <- c("f", "g", "i") #includes ceiling fan, table fan, cooler
sel <- apply(finaldata[,collist],1,function(row) any(cooling %in% row))
finaldata$Cooling[sel] <- 1

summary(finaldata$Cooling)
#mean is ~0.50 which means 50% of all enterprises use at least one of the Cooling appliances

#Others dummy variable
finaldata$Others <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
others <- c("h", "j", "k", "l", "m","n", "o","p") #includes tv, fridge, computer, printer, weighing mc, flour mill,others, elec sewing mc
sel <- apply(finaldata[,collist],1,function(row) any(others %in% row))
finaldata$Others[sel] <- 1

summary(finaldata$Others)
#mean is ~0.33 which means 33% of all enterprises use at least one of the Others appliances

#creating a new dataset for purpose for which appliance is used
elecpurpose <- finaldata[,c(478:481)]

#creating a summary dataframe of means of purposes for which appliances are used
elecpurposestats <- aggregate(elecpurpose[,2:4], list(elecpurpose$grid_and_mg), mean)

#creating data frame for % of respondents who use appliances for a particular purpose
elecpurpose100 <- elecpurposestats[,-c(1)] * 100

#creating a dataframe for electricity source
elecpurpose_df <- data.frame(Source = elecpurposestats[,1])

#combined dataset with % of purpose for which appliance is used
finalpurpose <- cbind(elecpurpose_df, elecpurpose100)

#making bar plot

elecpurpose <- barplot(as.matrix(finalpurpose[c(2,3),-c(1)]),
                       beside=T,
                       ylim = c(0,100),
                       main="Enterprises",
                       ylab = "Percent",
                       col=c("lightpink", "lightseagreen"),
                       font.main=1,
                       cex.axis = 1.8,
                       cex.main = 2,
                       cex.names = 1.8,
                       cex.lab = 1.8,
                       width = c(0.4,0.4,0.4,0.4,0.4,0.4),
                       space = c(0,0.5))

y <- round(as.matrix(finalpurpose[c(2,3),-c(1)]))
text(elecpurpose,y-2.5,labels=as.character(y), cex=1.6)
legend("topright", legend = c("Grid", "Mini-Grid"), fill = c("lightpink", "lightseagreen"), cex=1)



#########################
## (Fig. 5 and 7) PERCEPTIONS #########
########################

# run lines 25-47

#generating new DUMMY VARIABLES for PERCEPTIONS
#Perceptions towards GRID

#Reliability of Grid
finaldata$gridreliable <- 0
finaldata$gridreliable[finaldata$q601_a_grid_reliable==3] <- 1 #agree that grid is reliable

#Adequacy of Grid
finaldata$gridadequate <- 0
finaldata$gridadequate[finaldata$q601_b_grid_adequate==3] <- 1 #agree that grid is adequate

#Quality of Grid
finaldata$gridquality <- 0
finaldata$gridquality[finaldata$q601_c_grid_quality==3] <- 1 #agree that grid is good quality

#Affordability of Grid
finaldata$gridaffordable <- 0
finaldata$gridaffordable[finaldata$q601_d_grid_affordable==3] <- 1 #agree that grid is affordable

#Easy Repair of Grid
finaldata$grideasyrepair <- 0
finaldata$grideasyrepair[finaldata$q601_f_grid_easyrepair==3] <- 1 #agree that grid is affordable

#generating new dummy variables for PERCEPTIONS
#Perceptions towards MINI-GRID

#Reliability of Mini-Grid
finaldata$mgridreliable <- 0
finaldata$mgridreliable[finaldata$q603_a_mgrid_reliable==3] <- 1 #agree that mini-grid is reliable

#Adequacy of Mini-Grid
finaldata$mgridadequate <- 0
finaldata$mgridadequate[finaldata$q603_b_mgrid_adequate==3] <- 1 #agree that mini-grid is adequate

#Quality of Mini-Grid
finaldata$mgridquality <- 0
finaldata$mgridquality[finaldata$q603_c_mgrid_quality==3] <- 1 #agree that mini-grid is good quality

#Affordability of Mini-Grid
finaldata$mgridaffordable <- 0
finaldata$mgridaffordable[finaldata$q603_d_mgrid_affordable==3] <- 1 #agree that mini-grid is affordable

#Easy Repair of Mini-Grid
finaldata$mgrideasyrepair <- 0
finaldata$mgrideasyrepair[finaldata$q603_f_mgrid_easyrepair==3] <- 1 #agree that mini-grid is affordable

####BAR GRAPHS for for PERCEPTIONS####
#first creating a perceptions dataset
perceptions <- finaldata[478:488] #change these column numbers if needed

#creating a summary dataframe of means of perceptions
perceptionstats <- aggregate(perceptions[,2:11], list(perceptions$grid_and_mg), mean)

#creating data frame for % of respondents who 'agreed'
finalpercp100 <- perceptionstats[,-c(1)] * 100

#creating a dataframe for electricity source
df <- data.frame(Source = perceptionstats[,1])

#combined dataset with % agreed perceptions
finalpercp <- cbind(df, finalpercp100)

myvarsgrid <- c("Source", "gridreliable", "gridadequate", "gridquality", "gridaffordable", "grideasyrepair")
myvarsmg  <-  c("Source", "mgridreliable", "mgridadequate", "mgridquality", "mgridaffordable", "mgrideasyrepair")

finalpercp_grid <- finalpercp[myvarsgrid]
finalpercp_mgrid <- finalpercp[myvarsmg]

#changing variable name to make them common across the 2 df above
names(finalpercp_grid)[names(finalpercp_grid) == 'gridreliable'] <- 'Reliable'
names(finalpercp_grid)[names(finalpercp_grid) == 'gridadequate'] <- 'Adequate'
names(finalpercp_grid)[names(finalpercp_grid) == 'gridquality'] <- 'Good Quality'
names(finalpercp_grid)[names(finalpercp_grid) == 'gridaffordable'] <- 'Affordable'
names(finalpercp_grid)[names(finalpercp_grid) == 'grideasyrepair'] <- 'Easy Repair'

names(finalpercp_mgrid)[names(finalpercp_mgrid) == 'mgridreliable'] <- 'Reliable'
names(finalpercp_mgrid)[names(finalpercp_mgrid) == 'mgridadequate'] <- 'Adequate'
names(finalpercp_mgrid)[names(finalpercp_mgrid) == 'mgridquality'] <- 'Good Quality'
names(finalpercp_mgrid)[names(finalpercp_mgrid) == 'mgridaffordable'] <- 'Affordable'
names(finalpercp_mgrid)[names(finalpercp_mgrid) == 'mgrideasyrepair'] <- 'Easy Repair'

#adding a new variable to the 2 df to account for Perception Towards
finalpercp_grid$Perception_Towards <- "Grid"
finalpercp_mgrid$Perception_Towards <- "Mini-Grid"

#binding two df together
finalpercp2 = rbind(finalpercp_grid, finalpercp_mgrid)
finalpercp2

#################################
#Making BAR PLOT for PERCEPTIONS
################################

#first melting the dataset
#metling dataset requires installing reshape package
install.packages("reshape")
library(reshape)
df2 <- melt(finalpercp2, id.vars=c('Perception_Towards', 'Source'))

#making bar plot using ggplot
#installing ggplot2
install.packages("ggplot2")
library(ggplot2)

#Bar Plot for Perception of Grid Users where Source == 2 
gridpercpplot <- ggplot(data = df2[(df2$Source == 2),], aes(fill=Perception_Towards, y=value, x=variable)) + 
  geom_bar(position = position_dodge(0.5), width = 0.5, stat="identity")+
  ggtitle("Enterprises")+ #title of plot
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100, by = 10), expand = c(0,0))+ #setting limites to y axis
  theme_bw()+
  theme(plot.title = element_text(hjust=0.5)) + #position of title
  xlab("Attributes")+ #x label
  ylab("Agreed %") + #y label
  theme(legend.position = "none") +
  theme(plot.title = element_text(size=22))+
  theme(axis.text.y = element_text(size = 14, color="black"))+
  theme(axis.title=element_text(size=20))+
  theme(axis.text.x = element_text(size = 20, color ="black", angle=60, hjust = 1)) + #formatting x axis
  scale_fill_manual("Perception Towards", values = c("Grid" = "lightpink", "Mini-Grid" = "lightseagreen"))


gridpercpplot


#Bar Plot for Perception of Mini-Grid Users where Source == 3 
minigridpercpplot <- ggplot(data = df2[(df2$Source == 3),], aes(fill=Perception_Towards, y=value, x=variable)) + 
  geom_bar(position = position_dodge(0.5), width = 0.5, stat="identity")+
  ggtitle("Enterprises")+ #title of plot
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100, by = 10), expand = c(0,0))+ #setting limites to y axis
  theme_bw()+
  theme(plot.title = element_text(hjust=0.5)) + #position of title
  xlab("Attributes")+ #x label
  ylab("Agreed %") + #y label
  theme(legend.position = "none") +
  theme(plot.title = element_text(size=22))+
  theme(axis.text.y = element_text(size = 14, color="black"))+
  theme(axis.title=element_text(size=20))+
  theme(axis.text.x = element_text(size = 20, color ="black", angle=60, hjust = 1)) + #formatting x axis
  scale_fill_manual("Perception Towards", values = c("Grid" = "lightpink", "Mini-Grid" = "lightseagreen"))

minigridpercpplot

#################################
## (Fig. 6) Time of Day WHEN APPLIANCE IS USED ##
################################

#run lines 25-47

#creating a dataset with TOD variables for all appliances
myvars <- c("grid_and_mg","q501_I_d_tod_cooler","q501_H_d_tod_tv","q501_G_d_tod_table_fan","q501_D_d_tod_tubelight",
            "q501_C_d_tod_LED","q501_B_d_tod_CFL","q501_A_d_tod_Incades_bulb","q501_J_d_tod_fridge",
            "q501_L_d_tod_computer","q501_M_d_tod_printer","q501_N_d_tod_weighingmc","q501_F_d_tod_ceiling_fan", 
            "q501_P_d_tod_others", "q501_O_d_tod_flourmill")
finaldatatod <- finaldata[myvars]

#creating key based on source and TOD when appliance is used
finaldatatod$key_incan <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_A_d_tod_Incades_bulb)
finaldatatod$key_cfl   <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_B_d_tod_CFL)
finaldatatod$key_led   <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_C_d_tod_LED)
finaldatatod$key_tube  <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_D_d_tod_tubelight)
finaldatatod$key_ceiling <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_F_d_tod_ceiling_fan)
finaldatatod$key_tablefan <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_G_d_tod_table_fan)
finaldatatod$key_tv <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_H_d_tod_tv)
finaldatatod$key_cooler <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_I_d_tod_cooler)
finaldatatod$key_fridge <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_J_d_tod_fridge)
finaldatatod$key_sewingmc <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_K_d_tod_sewingmc)
finaldatatod$key_computer <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_L_d_tod_computer)
finaldatatod$key_printer <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_M_d_tod_printer)
finaldatatod$key_weighingmc <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_N_d_tod_weighingmc)
finaldatatod$key_flourmill <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_O_d_tod_flourmill)
finaldatatod$key_others <- paste(finaldatatod$grid_and_mg, finaldatatod$q501_P_d_tod_others)

#creating dataframes
incan_df = data.frame(table(finaldatatod$key_incan))
cfl_df = data.frame(table(finaldatatod$key_cfl))
led_df = data.frame(table(finaldatatod$key_led))
tube_df = data.frame(table(finaldatatod$key_tube))
ceiling_df = data.frame(table(finaldatatod$key_ceiling))
tablefan_df = data.frame(table(finaldatatod$key_tablefan))
cooler_df = data.frame(table(finaldatatod$key_cooler))
tv_df = data.frame(table(finaldatatod$key_tv))
fridge_df = data.frame(table(finaldatatod$key_fridge))
sewingmc_df = data.frame(table(finaldatatod$key_sewingmc))
computer_df = data.frame(table(finaldatatod$key_computer))
printer_df = data.frame(table(finaldatatod$key_printer))
weighingmc_df = data.frame(table(finaldatatod$key_weighingmc))
flourmill_df = data.frame(table(finaldatatod$key_flourmill))
others_df = data.frame(table(finaldatatod$key_others))

#renaming Var 1 as Key
names(incan_df)[names(incan_df) == "Var1"] <- "Key"
names(cfl_df)[names(cfl_df) == "Var1"] <- "Key"
names(led_df)[names(led_df) == "Var1"] <- "Key"
names(tube_df)[names(tube_df) == "Var1"] <- "Key"
names(ceiling_df)[names(ceiling_df) == "Var1"] <- "Key"
names(tablefan_df)[names(tablefan_df) == "Var1"] <- "Key"
names(cooler_df)[names(cooler_df) == "Var1"] <- "Key"
names(tv_df)[names(tv_df) == "Var1"] <- "Key"
names(fridge_df)[names(fridge_df) == "Var1"] <- "Key"
names(sewingmc_df)[names(sewingmc_df) == "Var1"] <- "Key"
names(computer_df)[names(computer_df) == "Var1"] <- "Key"
names(printer_df)[names(printer_df) == "Var1"] <- "Key"
names(weighingmc_df)[names(weighingmc_df) == "Var1"] <- "Key"
names(flourmill_df)[names(flourmill_df) == "Var1"] <- "Key"
names(others_df)[names(others_df) == "Var1"] <- "Key"

#adding appliance names to the dataframes
incan_df$Appliance <- "Incandescent Bulb"
cfl_df$Appliance <- "CFL"
led_df$Appliance <- "LED"
tube_df$Appliance <- "Tubelight"
ceiling_df$Appliance <- "Ceiling Fan"
tablefan_df$Appliance <- "Table Fan"
cooler_df$Appliance <- "Cooler"
tv_df$Appliance <- "TV"
fridge_df$Appliance <- "Refrigerator"
sewingmc_df$Appliance <- "Sewing Machine"
computer_df$Appliance <- "Computer"
printer_df$Appliance <- "Printer"
weighingmc_df$Appliance <- "Weighing Machine"
flourmill_df$Appliance <- "Flour Mill"
others_df$Appliance <- "Others"

##another way (by combining few appliances together)
incan_df$Appliance1 <- "Lighting"
cfl_df$Appliance1 <- "Lighting"
led_df$Appliance1 <- "Lighting"
tube_df$Appliance1 <- "Lighting"
ceiling_df$Appliance1 <- "Cooling"
tablefan_df$Appliance1 <- "Cooling"
cooler_df$Appliance1 <- "Cooling"
tv_df$Appliance1 <- "TV"
fridge_df$Appliance1 <- "Refrigerator"
sewingmc_df$Appliance1 <- "Sewing Machine"
computer_df$Appliance1 <- "Computer"
printer_df$Appliance1 <- "Printer"
weighingmc_df$Appliance1 <- "Weighing Machine"
flourmill_df$Appliance1 <- "Flour Mill"
others_df$Appliance1 <- "Others"

#combining the separate appliance dataframes
combinedtod <-rbind(incan_df, cfl_df, led_df, tube_df,
                    ceiling_df, tablefan_df, cooler_df,
                    tv_df, fridge_df, sewingmc_df, computer_df,printer_df, weighingmc_df,flourmill_df,others_df)

#making a separate variable for source of electricity from Key variable
combinedtod$grid_and_mg <-substr(combinedtod$Key,1,1)

#making a separate variable for TOD when appliance us used from Key Variable

#need to install package 'stringi' for this
install.packages("stringi")
#for me the simple command of install packages didn't work so i used the following code
install.packages("stringi", dependencies=TRUE, INSTALL_opts = c('--no-lock'))
#loading stringi package
library(stringi)

combinedtod$TOD<-substr(combinedtod$Key,2,stri_length(combinedtod$Key))

#creating barplot for Time of the day when appliances are used for grid users
#again using ggplot2

#before creating ggplot, creating a new variable Tod
combinedtod$Tod1 <- "Not Used"
combinedtod$Tod1[combinedtod$TOD==" 1"] <- "g. Morning Only" 
combinedtod$Tod1[combinedtod$TOD==" 2"] <-  "d. Afternoon only"
combinedtod$Tod1[combinedtod$TOD==" 1 2"] <-  "e.Morning and Afternoon"
combinedtod$Tod1[combinedtod$TOD==" 1 3"] <-   "f. Morning and Evening"
combinedtod$Tod1[combinedtod$TOD==" 2 3"] <-  "b. After and Evening"
combinedtod$Tod1[combinedtod$TOD==" 3"] <-  "c. Evening Only"
combinedtod$Tod1[combinedtod$TOD==" 1 2 3"] <- "a. Morning Afternoon Evening"

#now creating BARPLOTS for TOD when Appliance is used
#now creating ggplot for GRID USERS
gridtod <- ggplot(data = combinedtod[(combinedtod$grid_and_mg == "2")&!(is.na(combinedtod$TOD) | combinedtod$TOD==" "),], 
                  aes(x = factor(Appliance, levels = c("Incandescent Bulb", "CFL", "LED", "Tubelight", "Ceiling Fan", 
                                                       "Table Fan","Cooler", "TV", "Refrigerator", "Sewing Machine",
                                                       "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                      y = Freq, fill =Tod1)) + 
  geom_bar(stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(limits=c(0,150),breaks=seq(0,150, by = 25),expand = c(0,0))+
  theme_bw()+
  xlab("Appliances")+
  ylab("Count")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=11))+
  theme(axis.text.x = element_text(size = 9, color ="black")) + #formatting x axis
  scale_fill_manual("TOD",labels = c("5am-11am", "11am-5pm", "5am-5pm", "5am-11am & 5pm-11pm",
                                     "11am-11pm", "5pm-11pm", "5am-11pm"),
                    values = c("#8DA0CB","#FC8D62", "#A6D854", "#FFD92F","#E5C494","lightpink1", 
                               "#66C2A5"))+ 
  
  coord_flip()  

gridtod

##now for when lighting and cooling are combined and not including a few time variables
#removing time variables when TOD = 1 or TOD = 1 2 or TOD=1 3
combinedtod1 <- combinedtod[combinedtod$Tod1=="a. Morning Afternoon Evening"|combinedtod$Tod1=="b. After and Evening"|
                              combinedtod$Tod1=="c. Evening Only"|combinedtod$Tod1=="d. Afternoon only",]

gridtod1 <- ggplot(data = combinedtod1[(combinedtod1$grid_and_mg == "2")&!(is.na(combinedtod1$Tod1) | combinedtod1$Tod1==" "),], 
                   aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Sewing Machine",
                                                         "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                       y = Freq, fill =Tod1)) + 
  geom_bar(stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(limits=c(0,150),breaks=seq(0,150, by = 25),expand = c(0,0))+
  theme_bw()+
  xlab("Appliances")+
  ylab("Count")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=11))+
  theme(axis.text.x = element_text(size = 9, color ="black")) + #formatting x axis
  scale_fill_manual("TOD",labels = c("11am-5pm",
                                     "11am-11pm", "5pm-11pm", "5am-11pm"),
                    values = c("#276419","#4D9221", "#7FBC41", 
                               "#B8E186"))+ 
  
  coord_flip()  

gridtod1

#Appliance Use in % format
gridtodprop <- ggplot(data = combinedtod[(combinedtod$grid_and_mg == "2")&!(is.na(combinedtod$TOD) | combinedtod$TOD==" "),], 
                      aes(x = factor(Appliance, levels = c("Incandescent Bulb", "CFL", "LED", "Tubelight", "Ceiling Fan", 
                                                           "Table Fan","Cooler", "TV", "Refrigerator", "Sewing Machine",
                                                           "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                          y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=10))+
  theme(axis.text.x = element_text(size = 9, color ="black"))  #formatting x axis
scale_fill_manual("TOD",labels = c("5am-11am", "11am-5pm", "5am-5pm", "5am-11am & 5pm-11pm",
                                   "11am-11pm", "5pm-11pm", "5am-11pm"),
                  values = c("#8DA0CB","#FC8D62", "#A6D854", "#FFD92F","#E5C494","lightpink1", 
                             "#66C2A5"))+ 
  
  coord_flip()  

gridtodprop

######################
### appliance use % format
######################
gridtod1prop <- ggplot(data = combinedtod1[(combinedtod1$grid_and_mg == "2")&!(is.na(combinedtod1$Tod1) | combinedtod1$Tod1==" "),], 
                       aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Sewing Machine",
                                                             "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                           y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=15))+
  theme(legend.title = element_text(colour="black", size=10)) +
  theme(axis.title=element_text(size=13))+
  theme(axis.text = element_text(size = 13, color ="black"))  #formatting x axis
scale_fill_manual("TOD",labels = c("11am-5pm",
                                   "11am-11pm", "5pm-11pm", "5am-11pm"),
                  values = c("#FC8D62","#E5C494", "lightpink1", 
                             "#66C2A5"))+  
  
  coord_flip()  

gridtod1prop

########################
## (code for figure 6) appliance use % format minigrid ## 
########################

mgridtod1prop <- ggplot(data = combinedtod1[(combinedtod1$grid_and_mg == "3")&!(is.na(combinedtod1$Tod1) | combinedtod1$Tod1==" "),], 
                        aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Sewing Machine",
                                                              "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                            y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Enterprises")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=20,hjust = 0.5))+
  theme(legend.title = element_text(colour="black", size=18)) +
  theme(legend.text = element_text(size = 10))+
  theme(axis.title=element_text(size=18))+
  theme(axis.text = element_text(size = 18, color ="black"))+ #formatting x axis
  scale_fill_manual("TOD",labels = c("Throughout the day",
                                     "Afternoon and/ evening", 
                                     "Evening only",
                                     "Afternoon only"),
                    values = c("#7e88ba","#9fb2c3","#aed886","#f2e880"))+
  coord_flip()  

mgridtod1prop


##############################
## Expenditure on Electricity ##
##############################

## GRID ##

## Initial Expenditure on Grid ##
#summary of initial cost for grid connection
initialcostgrid <- summary(finaldata$q303_grid_initial_cost)

##### INITIAL COST BARPLOT
costboxplot <- data.frame(finaldata$q303_grid_initial_cost,finaldata$q337_mgrid_initial_cost)
names(costboxplot) <- c("Grid", "Mini-Grid")
par(font="plain")
boxplot(costboxplot$Grid,costboxplot$`Mini-Grid`
        names=c("Grid", "Mini-Grid"))
        
title(main="Initial cost of getting grid and mini-grid connections",
      ylab="Rupees",
      font.main=1,
      cex.axis=1.5)
       
initialcostgrid
#Mean cost is 1895 INR, and median is 1500 INR

## MONTHLY COST OF GRID CONNECTION ##

#variable bill for grid-based electricity
table(finaldata$q306_c_grid_bill_amt1)
table(finaldata$q306_d_grid_bill_units)
#very few households reported latest, variable bill. 

#creating a new column for monthly bill for grid-based electricity based on q307 that asks for average bill amount
finaldata$gridmonthlybill <- " "
finaldata$gridmonthlybill <- finaldata$q307_grid_bill_amt2/finaldata$q307_grid_bill_months2

#summary of monthly bill for grid-based electricity
monthlygridbill <- summary(finaldata$gridmonthlybill)
sd(finaldata$gridmonthlybill, na.rm=TRUE)

monthlygridbill
#average monthly bill is ~630 INR, median bill is 350; standard deviation is 1414.482 INR
#count is 541-393 = 148

#average hours power received
summary(finaldata$q308_grid_hours)

## MINI GRID ##

## Initial Expenditure on Mini-Grid ##
#summary of initial cost for grid connection
initialcostminigrid <- summary(finaldata$q337_mgrid_initial_cost)

initialcostminigrid
#Mean cost is ~670 INR, and median is 345 INR


## MONTHLY COST OF GRID CONNECTION ##

#variable bill for mini-grid electricity
table(finaldata$q340_c_mgrid_bill_amt1)
table(finaldata$q340_d_mgrid_bill_units)
#very few households reported latest, variable bill. 

#summary of monthly bill for mini-grid
monthlyminigridbill <- summary(finaldata$q340_e_mgrid_bill_amt2)

monthlyminigridbill
sd(finaldata$q340_e_mgrid_bill_amt2, na.rm=TRUE)

#average monthly bill is ~289 rupees; sd is ~385 INR
#count is 541-364 = 177

##average hours power received
summary(finaldata$q341_mgrid_hours)

#####################################
# (Table 3 and 4) Cost based on purpose of electricity use
#######################################
finaldatacost$con <-paste(finaldatacost$q501_appl_used1,finaldatacost$q501_appl_used2,finaldatacost$q501_appl_used3,finaldatacost$q501_appl_used4,finaldatacost$q501_appl_used5,finaldatacost$q501_appl_used6,finaldatacost$q501_appl_used7,finaldatacost$q501_appl_used8,finaldatacost$q501_appl_used9)
finaldatacost$con1 <-gsub("\\s", "", finaldatacost$con)
install.packages("stringr")
library(stringr)

library(stringi)
set.seed(123L)
value <- stri_rand_strings(10000, ceiling(runif(10000, 1, 100))) # 10000 random ASCII strings
head(value)

HP<-'o|p'
MP<-'h|j|k|l|m|n'
Light<-'a|b|c|d'
Cool<-'f|g|i'
d<-"c"
stri_detect_fixed(d,HP)
any(stri_detect_fixed(d,Cool))

finaldatacost$HP <- str_detect(finaldatacost$con1,HP)
finaldatacost$MP <- str_detect(finaldatacost$con1,MP)
finaldatacost$Light <- str_detect(finaldatacost$con1,Light)
finaldatacost$Cool <- str_detect(finaldatacost$con1,Cool)

finaldatacost$app_grp <-"99. Others"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==FALSE & finaldatacost$MP==FALSE & finaldatacost$HP==FALSE] <- "01. Light Only"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==TRUE & finaldatacost$MP==FALSE & finaldatacost$HP==FALSE] <- "02. Light and Cooling Only"
finaldatacost$app_grp[finaldatacost$MP==TRUE] <- "03. Medium Powered Only"
finaldatacost$app_grp[finaldatacost$HP==TRUE] <- "04. High Powered Only"

agg <- aggregate(list(finaldatacost$q303_grid_initial_cost,finaldatacost$gridmonthlybill), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost Grid","Avg Monthly Bill Grid") 

agg1 <- aggregate(list(finaldatacost$q337_mgrid_initial_cost,finaldatacost$q340_e_mgrid_bill_amt2), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg1)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost MGrid","Avg Monthly Bill MGrid") 


##combining cooling and medium powered
HP<-'o|p'
Cool<-'h|j|k|l|m|n|f|g|i'
Light<-'a|b|c|d'
d<-"c"
stri_detect_fixed(d,HP)
any(stri_detect_fixed(d,Cool))

finaldatacost$HP <- str_detect(finaldatacost$con1,HP)
finaldatacost$Cool <- str_detect(finaldatacost$con1,Cool)
finaldatacost$Light <- str_detect(finaldatacost$con1,Light)


finaldatacost$app_grp <-"99. Others"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==FALSE & finaldatacost$HP==FALSE] <- "01. Light Only"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==TRUE & finaldatacost$HP==FALSE] <- "02. Cooling and Others"
finaldatacost$app_grp[finaldatacost$HP==TRUE] <- "04. High Powered Only"

agg <- aggregate(list(finaldatacost$q303_grid_initial_cost,finaldatacost$gridmonthlybill), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost Grid","Avg Monthly Bill Grid") 

agg1 <- aggregate(list(finaldatacost$q337_mgrid_initial_cost,finaldatacost$q340_e_mgrid_bill_amt2), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg1)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost MGrid","Avg Monthly Bill MGrid") 

################################
##reasons for not using minigrid (Figure 8)
###############################

##plot based on CH 5 shared by Shalu

dfnomgrid <- data.frame("UseAlternative"=60,
                        "DontNeedElec"=13,
                        "CantAfford"=36,
                        "NoConnection"=5,
                        "Unreliable"=5,
                        stringsAsFactors = FALSE)

nomgridplot <- barplot(as.matrix(dfnomgrid),
                       beside=T,
                       ylim = c(0,100),
                       main="Enterprises",
                       ylab = "Percent",
                       col="lightpink1",
                       font.main=1,
                       cex.axis = 1.6,
                       cex.main = 1.8,
                       cex.names = 1.2,
                       cex.lab = 1.8,
                       space = c(0,0.4))

y23 <- round(as.matrix(dfnomgrid),1)
text(nomgridplot,y23+3.7,labels=as.character(y23), cex=1.5)



###################################################
########## Household Analysis #####################
##################################################


##loading data
setwd("~/Dropbox/Rockefeller Analysis/Manuscript_Anjali/R Studio")
data <- read.csv("HH_data_full.csv")

#10249 observations with 540 variables

#summarizing site codes
site_codeshh <- table(data$site_code)
#view site_codes
site_codeshh

#keeping only site_code 1 villages as analysis limited only to villages with MG intervention
onlyMGIVhh <- data[data$site_code == 1,]

#creating new categorical variable based on grid and mg use in households
onlyMGIVhh$grid_and_mghh <- 99
onlyMGIVhh$grid_and_mghh[onlyMGIVhh$q301_grid_use==1 & onlyMGIVhh$q335_mgrid_use==1] <- 1 #use both
onlyMGIVhh$grid_and_mghh[onlyMGIVhh$q301_grid_use==1 & onlyMGIVhh$q335_mgrid_use==0] <- 2 #only grid
onlyMGIVhh$grid_and_mghh[onlyMGIVhh$q301_grid_use==0 & onlyMGIVhh$q335_mgrid_use==1] <- 3 #only minigrid
onlyMGIVhh$grid_and_mghh[onlyMGIVhh$q301_grid_use==0 & onlyMGIVhh$q335_mgrid_use==0] <- 4 #use neither

#tabulate grid and minigrid use in households
table(onlyMGIVhh$grid_and_mghh)

#removing 62 observations that did not get coded in the grid_and_mghh variable
finaldatahh <- onlyMGIVhh[!(onlyMGIVhh$grid_and_mghh == 99),]
#this leaves us with 2648 observations and 541 variables

################################
## Descriptives about socio-economic characteristics of surveyed households
##############################
summary(finaldatahh$q208_edu)
table(finaldatahh$q208_edu)
#1027 out of 2648 households had illiterate head of household

table(finaldatahh$q209_religion)
#2128 households hindu (80%), remaining muslim

table(finaldatahh$q210_caste)
#1537 (58%) belonged to OBC, followed by SC households (22.5%), and then general category households (19%)

table(finaldatahh$q211_ration_card)
table(finaldatahh$q210_caste,finaldatahh$q211_ration_card)
#28% do not have ration card, majority (70%) get covered under the priority list as per NFSA of 2013.

table(finaldatahh$q212_income_source)
#42% engaged in non-agriculture labor; 26% agri on own land, ~17% self-employed or engaged in business
toccuhh <- table(finaldatahh$q212_income_source, finaldatahh$q210_caste)
prop.table(toccuhh,1)

summary(finaldatahh$q215_annual_income)
summary(finaldatahh$q216_a_hh_debt)
#30\% reported debt

############################################
## (Fig 1) Hours of grid supply vs minig-grid users
#############################################

#df - has HH dataset
df <- read.csv("HH_data_full.csv")

#Calculating Daily hours of supply- mean at village level:
vil_grid_hr <- group_by(df,q011_village_code) %>% summarize(mean(q308_grid_hours, na.rm =T))
vil_grid_hr$vil_grid_hr <- 
  round(vil_grid_hr$`mean(q308_grid_hours, na.rm = T)`, 1) 
vil_grid_hr$`mean(q308_grid_hours, na.rm = T) <- NULL

df <- merge(df, vil_grid_hr, by="q011_village_code", all.x=TRUE)
summary(df$vil_grid_hr) # 2 villages with no grid users - zero supply 
df$vil_grid_hr[is.na(df$vil_grid_hr)] <- 0
#in villages with no electricity - zero hours of supply

adf <- df %>% filter(df$site_code==1)
summary(adf$q335_mgrid_use)

rf <-  adf %>% 
  group_by(q011_village_code) %>%
  summarise(
    count = n(),
    mg = length(which(q335_mgrid_use==1)),
    mg_p = round(mg*100/count,1),
    g = length(which(q301_grid_use==1)),
    g_p = round(g*100/count,1),
    hours = unique(vil_grid_hr)
  )
table(rf$mg_p) # 34% have no HH mini-grid users

p <- ggplot(rf, aes(rf$hours, rf$mg_p)) +
  geom_point( color = "lightpink",size=4.5) + 
  ggtitle("Households")+
  scale_x_continuous("Average grid-electricity supply at village level (hours/day) ",
                     breaks = seq(4,24,4)) +
  scale_y_continuous("Share of households using mini-grid (%)", limits = c(0,100)) +
  stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 1)+
  theme_bw()
p + theme(axis.title.x = element_text(size = 21), axis.title.y = element_text(size = 23),
          axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
          plot.title = element_text(size=23, hjust = 0.5))
p + geom_smooth(method="loess",se=TRUE)

setwd("C:/Users/Shalu/Dropbox/Rockefeller Analysis/Manuscript_1_demand/Figures")
pdf(file= "mg_users_vs_gridsupply.pdf",  height = 4, width = 6)
p
dev.off()

p <- ggplot(rf, aes(rf$hours, rf$g_p)) +
  geom_point( color = "lightpink") + 
  scale_x_continuous("Average grid-electricity supply at village level (hours/day) ",
                     breaks = seq(4,24,4)) +
  scale_y_continuous("Share of households using grid electricity (%)") +
  theme_bw()

setwd("C:/Users/Shalu/Dropbox/Rockefeller Analysis/Manuscript_1_demand/Figures")
pdf(file= "g_users_vs_gridsupply.pdf",  height = 4, width = 6)
p
dev.off()


#####################################
## Primary Source of Electricity ####
####################################
table(finaldatahh$q352_elec_primary_source)
#So, 532 hh did not report any primary source
primelechh <- table(finaldatahh$q352_elec_primary_source)/length(finaldatahh$q352_elec_primary_source)
primelechh
# primelechh provides proportion of primary elec source
rownames(primelechh) <- c("NA", "Grid", "Mini-Grid", "SHS", "Battery", "Diesel Gen")
barplot(as.matrix(primelechh),beside=T)

###################################
## (Fig. 2) all sources of electricity used by households
##################################
## create a new variable for GRID USERS
finaldatahh$gridusershh <- 0
finaldatahh$gridusershh[finaldatahh$q301_grid_use==1] <- 1 #use grid


## create a new variable for SHS USERS
finaldatahh$shsusershh <- 0
finaldatahh$shsusershh[finaldatahh$q317_shs_use==1] <- 1 #use shs


## create a new variable for BATTERY USERS
finaldatahh$batteryusershh <- 0
finaldatahh$batteryusershh[finaldatahh$q324_battery_use==1] <- 1 #use battery

## create a new variable for MINI-GRID USERS
finaldatahh$minigridusershh <- 0
finaldatahh$minigridusershh[finaldatahh$q335_mgrid_use==1] <- 1 #use mini-grid

## create a new variable for DIESEL GENSET USERS
finaldatahh$dieselusershh <- 0
finaldatahh$dieselusershh[finaldatahh$q345_dg_use] <- 1 #use diesel


#creating a new dataset for sources of electricity
elecsourceshh <- finaldatahh[,c(541:546)]

#getting mean values
Grid <- mean(elecsourceshh$gridusershh)*100
SHS <- mean(elecsourceshh$shsusershh)*100
Battery <- mean(elecsourceshh$batteryusershh)*100
MiniGrid <- mean(elecsourceshh$minigridusershh)*100
Diesel <- mean(elecsourceshh$dieselusershh)*100

elecsources1hh <- cbind(Grid, SHS, Battery, MiniGrid,Diesel)

##making a barplot


elecsources2hh <- barplot(as.matrix(elecsources1hh),
                        beside=T,
                        ylim = c(0,100),
                        main="Households",
                        ylab = "Percent",
                        col="lightpink1",
                        font.main=1,
                        cex.axis = 1.8,
                        cex.main = 2.2,
                        cex.names = 1.8,
                        cex.lab = 1.8,
                        space = c(0,0.37))

y2 <- round(as.matrix(elecsources1hh),1)
text(elecsources2hh,y2+4,labels=as.character(y2), cex=1.6)

##########################
## (Fig. 3) MOSAIC PLOT ###########
########################
install.packages("vcd")
library(vcd)

## create a new variable for GRID USERS
finaldatahh$gridusershh <- 0
finaldatahh$gridusershh[finaldatahh$q301_grid_use==1] <- 1 #use grid
#recoding variable
finaldatahh$gridusershh[finaldatahh$gridusershh==1] <- "Grid"
finaldatahh$gridusershh[finaldatahh$gridusershh==0] <- "No Grid"

## create a new variable for MINI-GRID USERS
finaldatahh$minigridusershh <- 0
finaldatahh$minigridusershh[finaldatahh$q335_mgrid_use==1] <- 1 #use mini-grid
#recoding variable
finaldatahh$minigridusershh[finaldatahh$minigridusershh==1] <- "Mini-Grid"
finaldatahh$minigridusershh[finaldatahh$minigridusershh==0] <- "No Mini-Grid"

structable1hh <- structable(~gridusershh + minigridusershh, data=finaldatahh)
labshh <- 100*round(prop.table(structable1hh),2)
mosaic1hh <- mosaic(structable1hh,
                  pop=FALSE,
                  main="Households",
                  shade=TRUE,
                  gp=gpar(fill=c("lightpink1")),
                  gp_labels=(gpar(fontsize=18)),
                  legend=FALSE,
                  labelling=labeling_cells(text = labshh, margin=0, gp_text=gpar(fontsize=18))(as.table(structable1hh)))
                 


##########################
## (Fig. 4) What kind of APPLIANCES are being used  for EACH ELECTIRCITY SOURCE ###
########################

#I will create 3 different dummy variables - one for Lighting, one for Cooling, one for Others
#each dummy variable indicates whether the enterprise uses that particular kind of appliance or not

#Lighting dummy variable
finaldatahh$Lighting <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
lightinghh <- c("a", "b", "c", "d") #includes cfl, led, tube, and incandescent bulb
sel <- apply(finaldatahh[,collist],1,function(row) any(lightinghh %in% row))
finaldatahh$Lighting[sel] <- 1

summary(finaldatahh$Lighting)
#mean is ~0.65 which means 65% of all households use at least one of the lighting appliances

#Cooling dummy variable
finaldatahh$Cooling <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
coolinghh <- c("f", "g", "i") #includes ceiling fan, table fan, cooler
sel <- apply(finaldatahh[,collist],1,function(row) any(coolinghh %in% row))
finaldatahh$Cooling[sel] <- 1

summary(finaldatahh$Cooling)
#mean is ~0.68 which means 68% of all households use at least one of the Cooling appliances

#Others dummy variable
finaldatahh$Others <- 0
collist <- c("q501_appl_used1", "q501_appl_used2","q501_appl_used3","q501_appl_used4",
             "q501_appl_used5","q501_appl_used6","q501_appl_used7","q501_appl_used8",
             "q501_appl_used9")
othershh <- c("h", "j", "k", "l", "m","n", "o","p") #includes tv, fridge, computer, printer, weighing mc, flour mill,others, elec sewing mc
sel <- apply(finaldatahh[,collist],1,function(row) any(othershh %in% row))
finaldatahh$Others[sel] <- 1

summary(finaldatahh$Others)
#mean is ~0.26 which means 26% of all households use at least one of the Others appliances

#creating a new dataset for purpose for which appliance is used
elecpurposehh <- finaldatahh[,c(541:544)]

#creating a summary dataframe of means of purposes for which appliances are used
elecpurposestatshh <- aggregate(elecpurposehh[,2:4], list(elecpurposehh$grid_and_mghh), mean)

#creating data frame for % of respondents who use appliances for a particular purpose
elecpurpose100hh <- elecpurposestatshh[,-c(1)] * 100

#creating a dataframe for electricity source
elecpurposehh_df <- data.frame(Source = elecpurposestatshh[,1])

#combined dataset with % of purpose for which appliance is used
finalpurposehh <- cbind(elecpurposehh_df, elecpurpose100hh)

#making bar plot

elecpurposehh <- barplot(as.matrix(finalpurposehh[c(2,3),-c(1)]),
                       beside=T,
                       ylim = c(0,100),
                       main="Households",
                       font.main=1,
                       ylab = "Percent",
                       col=c("lightpink", "lightseagreen"),
                       font.main=1,
                       cex.axis = 1.8,
                       cex.main = 2,
                       cex.names = 1.8,
                       cex.lab = 1.8,
                       width = c(0.4,0.4,0.4,0.4,0.4,0.4),
                       space = c(0,0.5))
                      
yhh <- round(as.matrix(finalpurposehh[c(2,3),-c(1)]))
text(elecpurposehh,yhh-2.5,labels=as.character(yhh), cex=1.6)
legend("topright", legend = c("Grid", "MG"), fill = c("lightpink", "lightseagreen"), cex=1.3)

##################################################
### (Fig. 5 and 7) Perceptions ##################################
#generating new DUMMY VARIABLES for PERCEPTIONS
#Perceptions of households towards GRID

#Reliability of Grid
finaldatahh$gridreliablehh <- 0
finaldatahh$gridreliablehh[finaldatahh$q601_a_grid_reliable==3] <- 1 #agree that grid is reliable

#Adequacy of Grid
finaldatahh$gridadequatehh <- 0
finaldatahh$gridadequatehh[finaldatahh$q601_b_grid_adequate==3] <- 1 #agree that grid is adequate

#Quality of Grid
finaldatahh$gridqualityhh <- 0
finaldatahh$gridqualityhh[finaldatahh$q601_c_grid_quality==3] <- 1 #agree that grid is good quality

#Affordability of Grid
finaldatahh$gridaffordablehh <- 0
finaldatahh$gridaffordablehh[finaldatahh$q601_d_grid_affordable==3] <- 1 #agree that grid is affordable

#Easy Repair of Grid
finaldatahh$grideasyrepairhh <- 0
finaldatahh$grideasyrepairhh[finaldatahh$q601_f_grid_easyrepair==3] <- 1 #agree that grid is affordable

#generating new dummy variables for PERCEPTIONS
#Perceptions towards MINI-GRID

#Reliability of Mini-Grid
finaldatahh$mgridreliablehh <- 0
finaldatahh$mgridreliablehh[finaldatahh$q603_a_mgrid_reliable==3] <- 1 #agree that mini-grid is reliable

#Adequacy of Mini-Grid
finaldatahh$mgridadequatehh <- 0
finaldatahh$mgridadequatehh[finaldatahh$q603_b_mgrid_adequate==3] <- 1 #agree that mini-grid is adequate

#Quality of Mini-Grid
finaldatahh$mgridqualityhh <- 0
finaldatahh$mgridqualityhh[finaldatahh$q603_c_mgrid_quality==3] <- 1 #agree that mini-grid is good quality

#Affordability of Mini-Grid
finaldatahh$mgridaffordablehh <- 0
finaldatahh$mgridaffordablehh[finaldatahh$q603_d_mgrid_affordable==3] <- 1 #agree that mini-grid is affordable

#Easy Repair of Mini-Grid
finaldatahh$mgrideasyrepairhh <- 0
finaldatahh$mgrideasyrepairhh[finaldatahh$q603_f_mgrid_easyrepair==3] <- 1 #agree that mini-grid is affordable

####BAR GRAPHS for for PERCEPTIONS####
#first creating a perceptions dataset
perceptionshh <- finaldatahh[541:551]

#creating a summary dataframe of means of perceptions
perceptionstatshh <- aggregate(perceptionshh[,2:11], list(perceptionshh$grid_and_mghh), mean)

#creating data frame for % of respondents who 'agreed'
finalpercphh100 <- perceptionstatshh[,-c(1)] * 100

#creating a dataframe for electricity source
dfhh <- data.frame(Source = perceptionstatshh[,1])

#combined dataset with % agreed perceptions
finalpercphh <- cbind(dfhh, finalpercphh100)

myvarsgridhh <- c("Source", "gridreliablehh", "gridadequatehh", "gridqualityhh", "gridaffordablehh", "grideasyrepairhh")
myvarsmghh  <-  c("Source", "mgridreliablehh", "mgridadequatehh", "mgridqualityhh", "mgridaffordablehh", "mgrideasyrepairhh")

finalpercp_gridhh <- finalpercphh[myvarsgridhh]
finalpercp_mgridhh <- finalpercphh[myvarsmghh]

#changing variable name to make them common across the 2 df above
names(finalpercp_gridhh)[names(finalpercp_gridhh) == 'gridreliablehh'] <- 'Reliable'
names(finalpercp_gridhh)[names(finalpercp_gridhh) == 'gridadequatehh'] <- 'Adequate'
names(finalpercp_gridhh)[names(finalpercp_gridhh) == 'gridqualityhh'] <- 'Good Quality'
names(finalpercp_gridhh)[names(finalpercp_gridhh) == 'gridaffordablehh'] <- 'Affordable'
names(finalpercp_gridhh)[names(finalpercp_gridhh) == 'grideasyrepairhh'] <- 'Easy Repair'

names(finalpercp_mgridhh)[names(finalpercp_mgridhh) == 'mgridreliablehh'] <- 'Reliable'
names(finalpercp_mgridhh)[names(finalpercp_mgridhh) == 'mgridadequatehh'] <- 'Adequate'
names(finalpercp_mgridhh)[names(finalpercp_mgridhh) == 'mgridqualityhh'] <- 'Good Quality'
names(finalpercp_mgridhh)[names(finalpercp_mgridhh) == 'mgridaffordablehh'] <- 'Affordable'
names(finalpercp_mgridhh)[names(finalpercp_mgridhh) == 'mgrideasyrepairhh'] <- 'Easy Repair'

#adding a new variable to the 2 df to account for Perception Towards
finalpercp_gridhh$Perception_Towards <- "Grid"
finalpercp_mgridhh$Perception_Towards <- "Mini-Grid"

#binding two df together
finalpercphh2 = rbind(finalpercp_gridhh, finalpercp_mgridhh)
finalpercphh2

#################################
#Making BAR PLOT for PERCEPTIONS
################################

#first melting the dataset
#metling dataset requires installing reshape package
install.packages("reshape")
library(reshape)
dfhh2 <- melt(finalpercphh2, id.vars=c('Perception_Towards', 'Source'))

#making bar plot using ggplot
#installing ggplot2
install.packages("ggplot2")
library(ggplot2)

#Bar Plot for Perception of Grid Users where Source == 2 implies Grid Only Users
gridpercpplothh <- ggplot(data = dfhh2[(dfhh2$Source == 2),], aes(fill=Perception_Towards, y=value, x=variable)) + 
  geom_bar(position = position_dodge(0.5), width = 0.5, stat="identity")+
  ggtitle("Households")+ #title of plot
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100, by = 10), expand = c(0,0))+ #setting limites to y axis
  theme_bw()+
  theme(plot.title = element_text(hjust=0.5)) + #position of title
  xlab("Attributes")+ #x label
  ylab("Agreed %") + #y label
  theme(legend.title = element_text(colour="black", size=14)) +
  theme(legend.text=element_text(size=14))+
  theme(plot.title = element_text(size=22))+
  theme(axis.text.y = element_text(size = 14, color="black"))+
  theme(axis.title=element_text(size=20))+
  theme(axis.text.x = element_text(size = 20, color ="black", angle=60, hjust = 1)) + #formatting x axis
  scale_fill_manual("Perception Towards", values = c("Grid" = "lightpink", "Mini-Grid" = "lightseagreen"))


gridpercpplothh


#Bar Plot for Perception of Mini-Grid Users where Source == 3 implies Grid Only Users
minigridpercpplothh <- ggplot(data = dfhh2[(dfhh2$Source == 3),], aes(fill=Perception_Towards, y=value, x=variable)) + 
  geom_bar(position = position_dodge(0.5), width = 0.5, stat="identity")+
  ggtitle("Households")+ #title of plot
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100, by = 10), expand = c(0,0))+ #setting limites to y axis
  theme_bw()+
  theme(plot.title = element_text(hjust=0.5)) + #position of title
  xlab("Attributes")+ #x label
  ylab("Agreed %") + #y label
  theme(legend.title = element_text(colour="black", size=14)) +
  theme(legend.text=element_text(size=14))+
  theme(plot.title = element_text(size=22))+
  theme(axis.text.y = element_text(size = 14, color="black"))+
  theme(axis.title=element_text(size=20))+
  theme(axis.text.x = element_text(size = 20, color ="black", angle=60, hjust = 1)) + #formatting x axis
  scale_fill_manual("Perception Towards", values = c("Grid" = "lightpink", "Mini-Grid" = "lightseagreen"))

minigridpercpplothh

##################################################
###  (Fig. 6) TOD when appliance is used ##################
## TOD WHEN APPLIANCE IS USED ##
################################
#creating a dataset with TOD variables for all appliances
myvarshhtod <- c("grid_and_mghh","q501_A_d_tod_Incadesbulb","q501_B_d_tod_CFL","q501_C_d_tod_LED","q501_D_d_tod_tubelight","q501_F_d_tod_ceilingfan", "q501_G_d_tod_tablefan","q501_I_d_tod_cooler","q501_H_d_tod_tv","q501_J_d_tod_elec_stove","q501_K_d_tod_comp_laptop","q501_L_d_tod_fridge","q501_M_d_tod_iron","q501_N_d_tod_grinder","q501_O_d_tod_music_sys","q501_P_d_tod_ac", "q501_Q_d_tod_washing_mc", "q501_R_d_tod_fodder_mc","q501_S_d_tod_water_pump","q501_T_d_tod_others")
finaldatatod <- finaldatahh[myvarshhtod]                 
                
#creating key based on source and TOD when appliance is used
finaldatatod$key_incan <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_A_d_tod_Incadesbulb)
finaldatatod$key_cfl   <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_B_d_tod_CFL)
finaldatatod$key_led   <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_C_d_tod_LED)
finaldatatod$key_tube  <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_D_d_tod_tubelight)
finaldatatod$key_ceiling <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_F_d_tod_ceilingfan)
finaldatatod$key_tablefan <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_G_d_tod_tablefan)
finaldatatod$key_tv <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_H_d_tod_tv)
finaldatatod$key_cooler <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_I_d_tod_cooler)
finaldatatod$key_stove <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_J_d_tod_elec_stove)
finaldatatod$key_complap <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_K_d_tod_comp_laptop)
finaldatatod$key_fridge <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_L_d_tod_fridge)
finaldatatod$key_iron <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_M_d_tod_iron)
finaldatatod$key_grinder <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_N_d_tod_grinder)
finaldatatod$key_musicsys <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_O_d_tod_music_sys)
finaldatatod$key_ac <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_P_d_tod_ac)
finaldatatod$key_washingmc <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_Q_d_tod_washing_mc)
finaldatatod$key_fodder <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_R_d_tod_fodder_mc)
finaldatatod$key_waterpump <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_S_d_tod_water_pump)
finaldatatod$key_others <- paste(finaldatatod$grid_and_mghh, finaldatatod$q501_T_d_tod_others)

#creating dataframes
incan_df = data.frame(table(finaldatatod$key_incan))
cfl_df = data.frame(table(finaldatatod$key_cfl))
led_df = data.frame(table(finaldatatod$key_led))
tube_df = data.frame(table(finaldatatod$key_tube))
ceiling_df = data.frame(table(finaldatatod$key_ceiling))
tablefan_df = data.frame(table(finaldatatod$key_tablefan))
cooler_df = data.frame(table(finaldatatod$key_cooler))
tv_df = data.frame(table(finaldatatod$key_tv))
fridge_df = data.frame(table(finaldatatod$key_fridge))
stove_df = data.frame(table(finaldatatod$key_stove))
complap_df = data.frame(table(finaldatatod$key_complap))
iron_df = data.frame(table(finaldatatod$key_iron))
grinder_df = data.frame(table(finaldatatod$key_grinder))
music_df = data.frame(table(finaldatatod$key_musicsys))
ac_df = data.frame(table(finaldatatod$key_ac))
washingmc_df = data.frame(table(finaldatatod$key_washingmc))
fodder_df = data.frame(table(finaldatatod$key_fodder))
waterpump_df = data.frame(table(finaldatatod$key_waterpump))
others_df = data.frame(table(finaldatatod$key_others))

#renaming Var 1 as Key
names(incan_df)[names(incan_df) == "Var1"] <- "Key"
names(cfl_df)[names(cfl_df) == "Var1"] <- "Key"
names(led_df)[names(led_df) == "Var1"] <- "Key"
names(tube_df)[names(tube_df) == "Var1"] <- "Key"
names(ceiling_df)[names(ceiling_df) == "Var1"] <- "Key"
names(tablefan_df)[names(tablefan_df) == "Var1"] <- "Key"
names(cooler_df)[names(cooler_df) == "Var1"] <- "Key"
names(tv_df)[names(tv_df) == "Var1"] <- "Key"
names(fridge_df)[names(fridge_df) == "Var1"] <- "Key"
names(stove_df)[names(stove_df) == "Var1"] <- "Key"
names(complap_df)[names(complap_df) == "Var1"] <- "Key"
names(iron_df)[names(iron_df) == "Var1"] <- "Key"
names(grinder_df)[names(grinder_df) == "Var1"] <- "Key"
names(music_df)[names(music_df) == "Var1"] <- "Key"
names(waterpump_df)[names(waterpump_df) == "Var1"] <- "Key"
names(ac_df)[names(ac_df) == "Var1"] <- "Key"
names(washingmc_df)[names(washingmc_df) == "Var1"] <- "Key"
names(fodder_df)[names(fodder_df) == "Var1"] <- "Key"
names(others_df)[names(others_df) == "Var1"] <- "Key"

#adding appliance names to the dataframes
incan_df$Appliance <- "Incandescent Bulb"
cfl_df$Appliance <- "CFL"
led_df$Appliance <- "LED"
tube_df$Appliance <- "Tubelight"
ceiling_df$Appliance <- "Ceiling Fan"
tablefan_df$Appliance <- "Table Fan"
cooler_df$Appliance <- "Cooler"
tv_df$Appliance <- "TV"
fridge_df$Appliance <- "Refrigerator"
stove_df$Appliance <- "Electric Stove"
complap_df$Appliance <- "Computer/Laptop"
iron_df$Appliance <- "Electric Iron"
grinder_df$Appliance <- "Mixer Grinder"
music_df$Appliance <- "Electric Radio/Music System"
ac_df$Appliance <- "Electric Radio/Music System"
washingmc_df$Appliance <- "Electric Radio/Music System"
fodder_df$Appliance <- "Fodder Machine"
waterpump_df$Appliance <- "Water Pump"
others_df$Appliance <- "Others"

##another way (by combining few appliances together)
incan_df$Appliance1 <- "Lighting"
cfl_df$Appliance1 <- "Lighting"
led_df$Appliance1 <- "Lighting"
tube_df$Appliance1 <- "Lighting"
ceiling_df$Appliance1 <- "Cooling"
tablefan_df$Appliance1 <- "Cooling"
cooler_df$Appliance1 <- "Cooling"
tv_df$Appliance1 <- "TV"
fridge_df$Appliance1 <- "Refrigerator"
stove_df$Appliance1 <- "Electric Stove"
complap_df$Appliance1 <- "Computer/Laptop"
iron_df$Appliance1 <- "Electric Iron"
grinder_df$Appliance1 <- "Mixer Grinder"
music_df$Appliance1 <- "Electric Radio/Music System"
ac_df$Appliance1 <- "AC"
washingmc_df$Appliance1 <- "Washing Machine"
fodder_df$Appliance1 <- "Fodder Machine"
waterpump_df$Appliance1 <- "Water Pump"
others_df$Appliance1 <- "Others"

#combining the separate appliance dataframes
combinedtodhh <-rbind(incan_df, cfl_df, led_df, tube_df,
                    ceiling_df, tablefan_df, cooler_df,
                    tv_df, fridge_df, stove_df,complap_df,iron_df,grinder_df,music_df,ac_df,washingmc_df,fodder_df,waterpump_df,others_df)

#making a separate variable for source of electricity from Key variable
combinedtodhh$grid_and_mghh <-substr(combinedtodhh$Key,1,1)

#making a separate variable for TOD when appliance us used from Key Variable

#need to install package 'stringi' for this
install.packages("stringi")
#for me the simple command of install packages didn't work so i used the following code
install.packages("stringi", dependencies=TRUE, INSTALL_opts = c('--no-lock'))
#loading stringi package
library(stringi)

combinedtodhh$TOD<-substr(combinedtodhh$Key,2,stri_length(combinedtodhh$Key))

#creating barplot for Time of the day when appliances are used for grid users
#again using ggplot2

#before creating ggplot, creating a new variable Tod
combinedtodhh$Tod1 <- "Not Used"
combinedtodhh$Tod1[combinedtodhh$TOD==" 1"] <- "g. Morning Only" 
combinedtodhh$Tod1[combinedtodhh$TOD==" 2"] <-  "d. Afternoon only"
combinedtodhh$Tod1[combinedtodhh$TOD==" 1 2"] <-  "e.Morning and Afternoon"
combinedtodhh$Tod1[combinedtodhh$TOD==" 1 3"] <-   "f. Morning and Evening"
combinedtodhh$Tod1[combinedtodhh$TOD==" 2 3"] <-  "b. After and Evening"
combinedtodhh$Tod1[combinedtodhh$TOD==" 3"] <-  "c. Evening Only"
combinedtodhh$Tod1[combinedtodhh$TOD==" 1 2 3"] <- "a. Morning Afternoon Evening"

#now creating BARPLOTS for TOD when Appliance is used
#now creating ggplot for GRID USERS
gridtod <- ggplot(data = combinedtod[(combinedtod$grid_and_mg == "2")&!(is.na(combinedtod$TOD) | combinedtod$TOD==" "),], 
                  aes(x = factor(Appliance, levels = c("Incandescent Bulb", "CFL", "LED", "Tubelight", "Ceiling Fan", 
                                                       "Table Fan","Cooler", "TV", "Refrigerator", "Sewing Machine",
                                                       "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                      y = Freq, fill =Tod1)) + 
  geom_bar(stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(limits=c(0,150),breaks=seq(0,150, by = 25),expand = c(0,0))+
  theme_bw()+
  xlab("Appliances")+
  ylab("Count")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=11))+
  theme(axis.text.x = element_text(size = 9, color ="black")) + #formatting x axis
  scale_fill_manual("TOD",labels = c("5am-11am", "11am-5pm", "5am-5pm", "5am-11am & 5pm-11pm",
                                     "11am-11pm", "5pm-11pm", "5am-11pm"),
                    values = c("#8DA0CB","#FC8D62", "#A6D854", "#FFD92F","#E5C494","lightpink1", 
                               "#66C2A5"))+ 
  
  coord_flip()  

gridtod

##now for when lighting and cooling are combined and not including a few time variables
#removing time variables when TOD = 1 or TOD = 1 2 or TOD=1 3
combinedtodhh1 <- combinedtodhh[combinedtodhh$Tod1=="a. Morning Afternoon Evening"|combinedtodhh$Tod1=="b. After and Evening"|
                              combinedtodhh$Tod1=="c. Evening Only"|combinedtodhh$Tod1=="d. Afternoon only",]

gridtod1 <- ggplot(data = combinedtod1[(combinedtod1$grid_and_mg == "2")&!(is.na(combinedtod1$Tod1) | combinedtod1$Tod1==" "),], 
                   aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Sewing Machine",
                                                         "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                       y = Freq, fill =Tod1)) + 
  geom_bar(stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(limits=c(0,150),breaks=seq(0,150, by = 25),expand = c(0,0))+
  theme_bw()+
  xlab("Appliances")+
  ylab("Count")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=11))+
  theme(axis.text.x = element_text(size = 9, color ="black")) + #formatting x axis
  scale_fill_manual("TOD",labels = c("11am-5pm",
                                     "11am-11pm", "5pm-11pm", "5am-11pm"),
                    values = c("#FC8D62","#E5C494", "lightpink1", 
                               "#66C2A5"))+ 
  
  
  coord_flip()  

gridtod1

#Appliance Use in % format
gridtodprop <- ggplot(data = combinedtod[(combinedtod$grid_and_mg == "2")&!(is.na(combinedtod$TOD) | combinedtod$TOD==" "),], 
                      aes(x = factor(Appliance, levels = c("Incandescent Bulb", "CFL", "LED", "Tubelight", "Ceiling Fan", 
                                                           "Table Fan","Cooler", "TV", "Refrigerator", "Sewing Machine",
                                                           "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                          y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=10))+
  theme(axis.text.x = element_text(size = 9, color ="black"))  #formatting x axis
scale_fill_manual("TOD",labels = c("5am-11am", "11am-5pm", "5am-5pm", "5am-11am & 5pm-11pm",
                                   "11am-11pm", "5pm-11pm", "5am-11pm"),
                  values = c("#8DA0CB","#FC8D62", "#A6D854", "#FFD92F","#E5C494","lightpink1", 
                             "#66C2A5"))+ 
  
  coord_flip()  

gridtodprop

######################
### appliance use % format

gridtod1prop <- ggplot(data = combinedtod1[(combinedtod1$grid_and_mg == "2")&!(is.na(combinedtod1$Tod1) | combinedtod1$Tod1==" "),], 
                       aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Sewing Machine",
                                                             "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                           y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Time of the day when appliances are used by grid users")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=15))+
  theme(legend.title = element_text(colour="black", size=10)) +
  theme(axis.title=element_text(size=13))+
  theme(axis.text = element_text(size = 13, color ="black"))  #formatting x axis
scale_fill_manual("TOD",labels = c("11am-5pm",
                                   "11am-11pm", "5pm-11pm", "5am-11pm"),
                  values = c("#FC8D62","#E5C494", "lightpink1", 
                             "#66C2A5"))+  
  
  coord_flip()  

gridtod1prop


#now creating barplot for MINI-GRID Users
mgridtod <- ggplot(data = combinedtod[(combinedtod$grid_and_mg == "3")&!(is.na(combinedtod$TOD) | combinedtod$TOD==" "),], 
                   aes(x = factor(Appliance, levels = c("Incandescent Bulb", "CFL", "LED", "Tubelight", "Ceiling Fan", 
                                                        "Table Fan","Cooler", "TV", "Refrigerator", "Sewing Machine",
                                                        "Computer", "Printer", "Weighing Machine", "Flour Mill", "Others")),
                       y = Freq, fill =Tod1)) + 
  geom_bar(stat="identity") +
  ggtitle("Time of the day when appliances are used by mini-grid users")+ #title of plot
  scale_y_continuous(limits=c(0,150),breaks=seq(0,150, by = 25),expand = c(0,0))+
  theme_bw()+
  xlab("Appliances")+
  ylab("Count")+
  theme(plot.title = element_text(size=11))+
  theme(legend.title = element_text(colour="black", size=9)) +
  theme(axis.title=element_text(size=11))+
  theme(axis.text.x = element_text(size = 9, color ="black")) + #formatting x axis
  scale_fill_manual("TOD",labels = c("5am-11am", "11am-5pm", "5am-5pm", "5am-11am & 5pm-11pm",
                                     "11am-11pm", "5pm-11pm"),
                    values = c("#8DA0CB","#FC8D62","#A6D854", "#FFD92F","#E5C494","lightpink1"))+ 
  
  coord_flip()  

mgridtod

########################
## appliance use % format minigrid (code for Fig. 6) ## 
#########################

mgridtod1prophh <- ggplot(data = combinedtodhh1[(combinedtodhh1$grid_and_mghh == "3")&!(is.na(combinedtodhh1$Tod1) | combinedtodhh1$Tod1==" "),], 
                        aes(x = factor(Appliance1, levels = c("Lighting", "Cooling", "TV", "Refrigerator", "Electric Stove",
                                                              "Computer/Laptop", "Electric Iron", "Mixer Grinder", 
                                                              "Music System","AC", "Washing Machine", "Fodder Machine",
                                                              "Water Pump", "Others")),
                            y = Freq, fill =Tod1)) + 
  geom_bar(position = "fill", stat="identity") +
  ggtitle("Households")+ #title of plot
  scale_y_continuous(expand = c(0,0), labels = scales::percent)+
  theme_bw()+
  xlab("Appliances")+
  ylab("Percent")+
  theme(plot.title = element_text(size=20, hjust = 0.5))+
  theme(legend.title = element_text(colour="black", size=18)) +
  theme(legend.text = element_text(size = 14))+
  theme(axis.title=element_text(size=18))+
  theme(axis.text = element_text(size = 18, color ="black"))+ #formatting x axis
  scale_fill_manual("TOD",labels = c("Throughout the day (5am-11pm)",
                                     "Afternoon and/ evening (11am-11pm)", 
                                     "Evening only (5pm-11pm)",
                                     "Afternoon only (11am-5pm)"),
                    values = c("#7e88ba","#9fb2c3","#aed886","#f2e880"))+
  theme(legend.position = "none") +
  coord_flip()  
  

mgridtod1prophh


#################################
##cost (Table 3 and 4)
#################################
finaldatahhcost$con <-paste(finaldatacost$q501_appl_used1,finaldatacost$q501_appl_used2,finaldatacost$q501_appl_used3,finaldatacost$q501_appl_used4,finaldatacost$q501_appl_used5,finaldatacost$q501_appl_used6,finaldatacost$q501_appl_used7,finaldatacost$q501_appl_used8,finaldatacost$q501_appl_used9)
finaldatacost$con1 <-gsub("\\s", "", finaldatacost$con)
install.packages("stringr")
library(stringr)

library(stringi)
set.seed(123L)
value <- stri_rand_strings(10000, ceiling(runif(10000, 1, 100))) # 10000 random ASCII strings
head(value)

HP<-'o|p'
MP<-'h|j|k|l|m|n'
Light<-'a|b|c|d'
Cool<-'f|g|i'
d<-"c"
stri_detect_fixed(d,HP)
any(stri_detect_fixed(d,Cool))

finaldatacost$HP <- str_detect(finaldatacost$con1,HP)
finaldatacost$MP <- str_detect(finaldatacost$con1,MP)
finaldatacost$Light <- str_detect(finaldatacost$con1,Light)
finaldatacost$Cool <- str_detect(finaldatacost$con1,Cool)

finaldatacost$app_grp <-"99. Others"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==FALSE & finaldatacost$MP==FALSE & finaldatacost$HP==FALSE] <- "01. Light Only"
finaldatacost$app_grp[finaldatacost$Light==TRUE & finaldatacost$Cool==TRUE & finaldatacost$MP==FALSE & finaldatacost$HP==FALSE] <- "02. Light and Cooling Only"
finaldatacost$app_grp[finaldatacost$MP==TRUE] <- "03. Medium Powered Only"
finaldatacost$app_grp[finaldatacost$HP==TRUE] <- "04. High Powered Only"

agg <- aggregate(list(finaldatacost$q303_grid_initial_cost,finaldatacost$gridmonthlybill), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost Grid","Avg Monthly Bill Grid") 

agg1 <- aggregate(list(finaldatacost$q337_mgrid_initial_cost,finaldatacost$q340_e_mgrid_bill_amt2), by=list(finaldatacost$app_grp,finaldatacost$grid_and_mg), FUN=mean,na.rm=TRUE)    
names(agg1)[1:4] <- c("Appliance","Grid Type","Avg Initial Cost MGrid","Avg Monthly Bill MGrid") 

################################
##reasons for not using minigrid (Figure 8)
###############################
##plot based on CH 5 shared by Shalu
dfnomgrid <- data.frame("UseAlternative"=50,
                        "DontNeedElec"=2,
                        "CantAfford"=55,
                        "NoConnection"=3,
                        "Unreliable"=11,
                        stringsAsFactors = FALSE)

nomgridplot <- barplot(as.matrix(dfnomgrid),
                       beside=T,
                       ylim = c(0,100),
                       main="Households",
                       ylab = "Percent",
                       col="lightpink1",
                       font.main=1,
                       cex.axis = 1.6,
                       cex.main = 1.8,
                       cex.names = 1.2,
                       cex.lab = 1.8,
                       space = c(0,0.4))

y23 <- round(as.matrix(dfnomgrid),1)
text(nomgridplot,y23+3.7,labels=as.character(y23), cex=1.5)





